Using the Species distribution modeling techniques provided by the EcoCommons Platform (www.ecocommons.org.au), we produced probability distribution maps for the three Queensland endangered species: koala, brush tailed rock-wallaby, and beach stone curlew.
Then we adjusted the probability distribution maps of these three species with the planning units shapefile prepared by the Marxan MaPP, and ran four planning scenarios with a target of expanding the coverage of protected areas in QLD to 30%.
Note: to replicate this project on your own device, please go to Readme.md
EcoCommons Outputs
Species records pulled from GBIF, ALA, EcoPlots, OBIS
Species distribution modelling output: Species distribution Probability maps (This is the input tested in this project).
Marxan MaPP Inputs
Shapefile of planning area and units.
Shapefile of cost.
Shapefile and csv of biodiversity features (Where EcoCommons can help!).
# important!!! if you are not in the directory of this project, place run the below code# setwd(“/replace_this_with_your_actual_directory/ecocommons-marxan-integration-poc”)
Activate the virtual environment “renv” to install and load all essential packages
# install "renv" package if not been installedif (!requireNamespace("renv", quietly =TRUE)) {install.packages("renv")}if (!file.exists("renv/activate.R")) {message("renv has not been initiated in this project. Initializing now...") renv::init() # This initializes renv if it's not set up} else {source("renv/activate.R") # This ensures the renv environment is activatedmessage("renv is activated.")}# Ensure renv is installed and initializedif (file.exists("renv.lock")) { renv::restore()} else {message("No renv.lock file found in the current directory.")}
Install and load essential packages
# Set CRAN mirroroptions(repos =c(CRAN ="https://cran.rstudio.com/"))# First specify the packages of interestpackages =c("shiny", "sf", "terra", "ggplot2", "ggspatial", "raster", "dplyr", "shiny", "httpuv", "rmarkdown", "knitr", "jsonlite", "reticulate", "htmltools", "pryr")# Now load or install&load all. This process will take a long time since we are using a virtual environment and install a lot of packages.package.check <-lapply( packages,FUN =function(x) {if (!require(x, character.only =TRUE)) {install.packages(x, dependencies =TRUE)library(x, character.only =TRUE) } } )Sys.setenv(PROJ_LIB ="/usr/local/Cellar/proj/9.5.0/share/proj")renv::snapshot()
1. We get the QLD planning units from Marxan MaPP
QLD_Unit <-"qld_3species_Marxan/QLD_plannningunits/cost-surface-template.shp"#This cost-surface-template was prepared by the Marxan Mapp with a resolution of 189 Km2, which is the highest resolution Marxan Mapp can give at this scale.QLD_Unit <-st_read(QLD_Unit)QLD_Unit <-st_simplify(QLD_Unit , dTolerance =0.01) # Calculate the resolution since Marxan MaPP for visulization purposeareas <-st_area(QLD_Unit)areas_numeric <-as.numeric(areas)average_area <-mean(areas_numeric)# Convert to numericaverage_area_km2 <- average_area /1e6# Get the number of rowsn_rows <-nrow(QLD_Unit)# Plot the shapefile with no fill color and number of rows in the titleggplot(data = QLD_Unit) +geom_sf(fill =NA, color ="gray") +theme_minimal() +ggtitle(paste("QLD Planning Units:", n_rows, "\n","Resolution of planning in square kilometers:", round(average_area_km2)))+theme(plot.title =element_text(hjust =0.5)) # Center the title
2. I made a cost layer using the reciprocal of the distance to state-owned road as a surrogate of the cost.
The assumption is: the closer to the state owned road, the more expensive to purchase the unit.
QLD_cost_road <-st_read("qld_3species_Marxan/QLD_Cost/QLD_cost_road.shp")# Plot the shapefile with continuous cost_road valuesggplot(QLD_cost_road) +geom_sf(aes(fill = cost_road)) +scale_fill_continuous(name ="Cost",low ="lightblue", high ="red",labels =c("0 (Low cost)", "1 (High cost)"),breaks =c(0.01, 1)) +theme_minimal() +labs(title ="Cost: using the distance to road of each Unit as a proxy")+theme(plot.title =element_text(hjust =0.5)) # Center the title
3. Biodiversity features. I used EcoCommons to produce three species’ SDM to start with.
Species 1: koala
Species 2: brush tailed rock-wallaby
Species 3: beach stone curlew
# Define the folder path where the rasters are storedfolder_path <-"qld_3species_Marxan/QLD_feature/"# Get a list of all .tif files in the folderraster_files <-list.files(path = folder_path, pattern ="\\.tif$", full.names =TRUE)# Extract the species names from the file names (removing the folder path and .tif extension)species_names <- tools::file_path_sans_ext(basename(raster_files))# Read all raster files in one go using lapplyraster_list <-lapply(raster_files, rast) # Use rast() from terra for reading rasters# Using QLD_Unit as the spatial vector for masking# Transform the raster CRS to match the vector CRS and apply masking in one stepraster_list <-lapply(raster_list, function(r) { r_transformed <-project(r, crs(vect(QLD_Unit)))mask(r_transformed, vect(QLD_Unit))})# Function to convert rasters to data frames and combine themprepare_raster_data <-function(raster_list, species_names) {# Initialize an empty data frame combined_df <-data.frame()# Loop through each raster and combine them into one data framefor (i inseq_along(raster_list)) {# Convert raster to a data frame raster_df <-as.data.frame(raster_list[[i]], xy =TRUE)# Rename the third column to 'value' or any appropriate name for the raster valuesnames(raster_df)[3] <-"value"# Add a column to identify the species name raster_df$species <- species_names[i]# Combine the raster data with the overall data frame combined_df <-bind_rows(combined_df, raster_df)}return(combined_df)}# Prepare the combined data framecombined_raster_df <-prepare_raster_data(raster_list, species_names)# Create the ggplot with facet_wrap to display each raster in a separate facetggplot(combined_raster_df, aes(x = x, y = y, fill = value)) +# Use the correct column name for fillgeom_raster()+facet_wrap(~ species, ncol =3) +# Adjust ncol to control the number of columnsscale_fill_viridis_c() +# You can adjust the color scale as neededlabs(title ="Species SDM") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))
4. We need to turn these SDMs to binary results (shapefies).
# Define the folder paths where the rasters and CSV files are storedfolder_path_rasters <-"qld_3species_Marxan/QLD_feature/"folder_path_csvs <-"qld_3species_Marxan/model_evaluation/"output_dir <-"qld_3species_Marxan/QLD_feature/Marxan_feature_input/"QLD_Unit <-"qld_3species_Marxan/QLD_plannningunits/cost-surface-template.shp"#This cost-surface-template was prepared by the Marxan Mapp with a resolution of 189 Km2, which is the highest resolution Marxan Mapp can give at this scale.QLD_Unit <-st_read(QLD_Unit)QLD_Unit <-st_simplify(QLD_Unit , dTolerance =0.01) # Get a list of all .tif files and CSV files in the folderraster_files <-list.files(path = folder_path_rasters, pattern ="\\.tif$", full.names =TRUE)csv_files <-list.files(path = folder_path_csvs, pattern ="\\.csv$", full.names =TRUE)# Extract the species names from the file names (removing the folder path and .tif/.csv extension)species_names <- tools::file_path_sans_ext(basename(raster_files))# Read all raster files in one go using lapplyraster_list <-lapply(raster_files, rast) # Use rast() from terra for reading rasters# Transform the raster CRS to match the vector CRS and apply masking in one stepraster_list <-lapply(raster_list, function(r) { r_transformed <-project(r, crs(vect(QLD_Unit)))mask(r_transformed, vect(QLD_Unit))})# Prepare a named list of rastersspecies_rasters <-setNames(raster_list, species_names)species_csvs <-setNames(csv_files, species_names)# Define UI for the applicationui <-fluidPage(titlePanel("Interactive TSS-based threshold for the probability of presence and absence of Species"),# Use a loop to create a row for each specieslapply(species_names, function(species) {fluidRow(column(3, h4(paste("Species:", species)),sliderInput(paste0("tss_value_", species), "Select TSS Value:", min =0, max =1, value =0.5, step =0.01),actionButton(paste0("run_analysis_", species), "Run Species Analysis"),br(),textOutput(paste0("tpr_tnr_", species)) ),column(4, plotOutput(paste0("plot_", species), width ="400px") ),column(5, plotOutput(paste0("species_plot_", species)) ) ) }))# Define server logicserver <-function(input, output, session) { selected_raster <-function(species) { species_rasters[[species]] } species_eval_data <-function(species) { csv_path <- species_csvs[[species]]if (!file.exists(csv_path)) {showNotification(paste("CSV file for", species, "not found!"), type ="error")return(NULL) } eval_data <-read.csv(csv_path)if (!all(c("tpr", "tnr", "tpv") %in%names(eval_data))) {showNotification(paste("Required columns missing in CSV for", species), type ="error")return(NULL) }if (nrow(eval_data) ==0) {showNotification(paste("No data found in CSV for", species), type ="error")return(NULL) } eval_data$tss <-round(eval_data$tpr + eval_data$tnr -1, 3)return(eval_data) }lapply(species_names, function(species) { eval_data <-species_eval_data(species)if (!is.null(eval_data)) { min_tss <-min(eval_data$tss, na.rm =TRUE) max_tss <-max(eval_data$tss, na.rm =TRUE)updateSliderInput(session, paste0("tss_value_", species), min = min_tss, max = max_tss, value = max_tss,step =0.01) }observeEvent(input[[paste0("tss_value_", species)]], {if (!is.null(eval_data)) { row <-which.min(abs(eval_data$tss - input[[paste0("tss_value_", species)]])) tpr <- eval_data$tpr[row] tnr <- eval_data$tnr[row] output[[paste0("tpr_tnr_", species)]] <-renderText({paste0("TPR (Sensitivity): ", round(tpr, 3), ", TNR (Specificity): ", round(tnr, 3)) }) } }) output[[paste0("plot_", species)]] <-renderPlot({if (is.null(eval_data)) return(NULL)ggplot(eval_data, aes(x = tpv)) +geom_line(aes(y = tpr, colour ="TPR"), linewidth =1) +geom_line(aes(y = tnr, colour ="TNR"), linewidth =1) +geom_line(aes(y = tss, colour ="TSS"), linewidth =1) +geom_vline(xintercept = eval_data$tpv[which.min(abs(eval_data$tss - input[[paste0("tss_value_", species)]]))],linetype ="dotted", color ="red", linewidth =1) +labs(title =paste("Sensitivity, Specificity, and TSS for", species),x ="Threshold Probability Value",y ="Value") +scale_colour_manual(values =c("TPR"="blue", "TNR"="green", "TSS"="red")) +theme_minimal() })observeEvent(input[[paste0("run_analysis_", species)]], { species_shp <-process_species(selected_raster(species), QLD_Unit, species, output_dir, input[[paste0("tss_value_", species)]]) output[[paste0("species_plot_", species)]] <-renderPlot({ggplot() +geom_sf(data = QLD_Unit, fill =NA, color ="grey") +geom_sf(data = species_shp, aes(fill = feature), color =NA) +scale_fill_viridis_c(option ="plasma") +labs(title =paste("Species Distribution for", species),x ="Longitude", y ="Latitude") +theme_minimal() }) }) })}process_species <-function(raster_data, planning_unit, species_name, output_dir, tss_threshold) { raster_data_transformed <-project(raster_data, crs(vect(planning_unit))) extracted_values <-extract(raster_data_transformed, vect(planning_unit), fun = mean, na.rm =TRUE)names(planning_unit)[names(planning_unit) =="cost"] <-"feature" planning_unit$feature <- extracted_values[, 2] QLD_species <-subset(planning_unit, feature >= tss_threshold) shapefile_base <-file.path(output_dir, species_name)st_write(QLD_species, paste0(shapefile_base, ".shp"), delete_layer =TRUE)return(QLD_species)}# Run the application shinyApp(ui = ui, server = server)
5. Plot species SDM binary shapefile outputs for double check
output_dir <-"qld_3species_Marxan/QLD_feature/Marxan_feature_input/"# List all the shapefiles in the directory (assuming each species has its own shapefile)species_files <-list.files(output_dir, pattern ="\\.shp$", full.names =TRUE)species_files# Extract species names from the filenames (you can adjust this depending on your naming conventions)species_names <- tools::file_path_sans_ext(basename(species_files))# Load all species shapefiles and add a species identifierspecies_sf_list <-lapply(seq_along(species_files), function(i) { sf <-st_read(species_files[i]) sf$species <- species_names[i] # Add species name columnreturn(sf)})# Combine all species into one datasetcombined_species_sf <-do.call(rbind, species_sf_list)# Plot the unit (base map) first and overlay the species habitats without borderscombined_plot_with_unit <-ggplot() +geom_sf(data = QLD_Unit, fill =NA, color ="grey", linewidth =0.01) +# Base map (QLD Unit)geom_sf(data = combined_species_sf, aes(fill = species), color =NA) +# No borders for speciesscale_fill_manual(values = RColorBrewer::brewer.pal(n =length(species_names), name ="Set1")) +# Automatically assign colorstheme_minimal() +labs(title ="Species Habitats within QLD Unit",subtitle =paste(species_names, collapse =", ")) +# List all species in subtitletheme(legend.title =element_blank())# Display the plotprint(combined_plot_with_unit)
6. We can also make a species presence and absence csv table.
# Function to extract presence (1) and absence (0) from raster based on a threshold (e.g., 0.5)extract_presence_absence <-function(raster_data, unit) { extracted_values <-extract(raster_data, vect(unit), fun = mean, na.rm =TRUE) presence_absence <-ifelse(extracted_values[, 2] >=0.5, 1, 0)return(presence_absence)}# Create an empty presence-absence data framepresence_absence_df <-data.frame(puid = QLD_Unit$puid) # Assuming 'puid' is the unique identifier# Loop through each species raster in the raster list and extract presence-absence datafor (i inseq_along(raster_list)) {# Generate a dynamic presence column name for the current species presence_col_name <-paste0(species_names[i], "_presence")# Extract presence/absence data and add it to the presence-absence dataframe presence_absence_df[[species_names[i]]] <-extract_presence_absence(raster_list[[i]], QLD_Unit)}# Write the final presence-absence data frame to a CSV fileoutput_csv <-file.path(output_dir, "presence_absence_species.csv")write.csv(presence_absence_df, output_csv, row.names =FALSE)# Check the CSV outputprint(head(presence_absence_df))